This is the version without code.

1 Objective

The goal of this markdown is to visualize the antibody reactivity data from multiplex immunoassays performed by Tenzin Tashi and compiled by Aditi Upadhye for the CTC PDT funded project “Evaluating the dynamics and function of naturally acquired humoral immune responses to Plasmodium vivax vaccine candidate antigens in a longitudinal study.”

The data consists of IgG responses specific against several P. vivax antigens using plasma obtained for Brazilian patients with acute vivax malaria and followed up at days 30, 60, and 180 after initial enrollment. This is an initial attempt to analyze the decay of antibody responses over time.

The major findings of this study to date are:

  1. The antigens with the highest seroprevalence after acute P. vivax malaria are Pv41, PvGAMA, PvRBP2b.
  2. Antigens that produce the longest-lived IgG responses after acute vivax malaria are PVX_081550, PvRBP2b, and PvMSP3a.

Table 1. Participant characteristics
Fig. 1. Correlations of antigen-specific IgG responses
Fig. 2. Seropositivity at each timepoint by antigen for subjects with data
Fig. 3. Reactivity over time points, faceted by Pv antigen, as ln(max FI) per subject, exponential decay
[Table 2. Half-lives, linear-mixed effects regression, adjusted analysis]
[Table 3. Half-lives, beta-distributed response linear mixed effects model, adjusted analysis]

2 Setup

3 Read in data

#read in all data
alldat <- readRDS(paste0(datadir,"Brazil Pvivax dataframe Aditi 02282022.rds")) %>%
  dplyr::filter(Dilution == "1:400") %>%
  mutate(Antigen = gsub("DBP-FL", "PvDBP-FL", Antigen)) %>%
  mutate(Antigen = gsub("MSP3a", "PvMSP3a", Antigen)) %>%
  mutate(Antigen = gsub("EBP", "PvEBP", Antigen)) %>%
  mutate(Antigen = gsub("CD4", "ratCD4", Antigen)) %>%
  mutate(Antigen = factor(Antigen, levels = c("BSA","ratCD4", "Thioredoxin", "Tetanus Toxoid",
                                              "Pv12",
                                              "Pv41",
                                              "PvDBP-FL",
                                              "PvEBP",
                                              "PvGAMA",
                                              "PvMSP3a",
                                              "PvRBP1a",
                                              "PvRBP2b",
                                              "Pvx081550"))) %>%
  mutate(Antigen = gsub("Pvx081550", "PVX_081550", Antigen)) %>%
  mutate(Antigen = gsub("Tetanus Toxoid", "tetanus toxoid", Antigen)) %>%
  mutate(AntigenType = factor(AntigenType, levels = c("background control", "reactivity control", "P. vivax antigen")))

#read in cleaned data
alldat_norm_bkgd_rem <- readRDS(paste0(datadir, "alldat_norm_bkgd_rem.rds"))  %>%
  mutate(Antigen = gsub("Tetanus Toxoid", "tetanus toxoid", Antigen))

4 Make table of participant characteristics

##                                                   Stratified by City
##                                                    Overall          
##   n                                                47               
##   Gender (%)                                       34 (72.3)        
##   Age, years (median [range])                      32.0 [12.0, 64.0]
##   Years residence in Amazon basin (median [range]) 26.0 [7.0, 64.0] 
##                                                   Stratified by City
##                                                    Acrelândia       
##   n                                                10               
##   Gender (%)                                       7 (70.0)         
##   Age, years (median [range])                      31.0 [14.0, 55.0]
##   Years residence in Amazon basin (median [range]) 21.0 [8.0, 43.0] 
##                                                   Stratified by City
##                                                    Califónia        
##   n                                                6                
##   Gender (%)                                       3 (50.0)         
##   Age, years (median [range])                      41.0 [29.0, 56.0]
##   Years residence in Amazon basin (median [range]) 33.0 [7.0, 54.0] 
##                                                   Stratified by City
##                                                    Jéssica          
##   n                                                19               
##   Gender (%)                                       15 (78.9)        
##   Age, years (median [range])                      30.0 [18.0, 56.0]
##   Years residence in Amazon basin (median [range]) 30.0 [18.0, 54.0]
##                                                   Stratified by City
##                                                    Plácido           p    
##   n                                                12                     
##   Gender (%)                                       9 (75.0)          0.576
##   Age, years (median [range])                      23.0 [12.0, 64.0] 0.183
##   Years residence in Amazon basin (median [range]) 19.0 [7.0, 64.0]  0.033
##                                                   Stratified by City
##                                                    test   
##   n                                                       
##   Gender (%)                                              
##   Age, years (median [range])                      nonnorm
##   Years residence in Amazon basin (median [range]) nonnorm

4.1 Table 1. Participant characteristics

5 Correlations

Make correlation matrices for each timepoint

Assess number of samples per timepoint

## `summarise()` has grouped output by 'Antigen'. You can override using the
## `.groups` argument.
## # A tibble: 36 × 3
## # Groups:   Antigen [9]
##    Antigen  Timepoint     n
##    <chr>        <dbl> <int>
##  1 Pv12             0    44
##  2 Pv12            30    40
##  3 Pv12            60    22
##  4 Pv12           180    21
##  5 Pv41             0    44
##  6 Pv41            30    40
##  7 Pv41            60    22
##  8 Pv41           180    21
##  9 PvDBP-FL         0    44
## 10 PvDBP-FL        30    40
## # … with 26 more rows

Plot correlation heatmaps by timepoint

5.1 Fig. 1. Correlations of antigen-specific IgG responses

day 0

corrplot::corrplot(timepoint_0_mat$r,
                   method = "square",
                   order = "hclust",
                   hclust.method = "ward.D2",
                   type = "upper",
                   p.mat = timepoint_0_mat$p,
                   insig = "label_sig",
                   sig.level = c(.001, .01, .05),
                   pch.cex = 1,
                   pch.col = "white",
                   tl.col="black",
                   addCoef.col = "black",
                   outline=FALSE)

day 30

corrplot::corrplot(timepoint_30_mat$r,
                   method = "square",
                   order = "hclust",
                   hclust.method = "ward.D2",
                   type = "upper",
                   p.mat = timepoint_30_mat$p,
                   insig = "label_sig",
                   sig.level = c(.001, .01, .05),
                   pch.cex = 1,
                   pch.col = "white",
                   tl.col="black",
                   addCoef.col = "black",
                   outline=FALSE)

day 60

corrplot::corrplot(timepoint_60_mat$r,
                   method = "square",
                   order = "hclust",
                   hclust.method = "ward.D2",
                   type = "upper",
                   p.mat = timepoint_60_mat$p,
                   insig = "label_sig",
                   sig.level = c(.001, .01, .05),
                   pch.cex = 1,
                   pch.col = "white",
                   tl.col="black",
                   addCoef.col = "black",
                   outline=FALSE)

day 180

corrplot::corrplot(timepoint_180_mat$r,
                   method = "square",
                   order = "hclust",
                   hclust.method = "ward.D2",
                   type = "upper",
                   p.mat = timepoint_180_mat$p,
                   insig = "label_sig",
                   sig.level = c(.001, .01, .05),
                   pch.cex = 1,
                   pch.col = "white",
                   tl.col="black",
                   addCoef.col = "black",
                   outline=FALSE)

6 Seroprevalence

Before preceding further, we want to determine what a seropositive is using the North American controls to set a threshold for positivity (mean + 3 st dev).

We first filter out the North American controls, noting that only all were ran on Plate 1 except for NAC 1, which was run on all plates. For consistency, and being that we already normalized based on the positive control standard, we will only use NACs on Plate 1 for this.

Evaluate seroprevalence (% seropositive) for each timepoint, faceted by antigen

alldat_norm_bkgd_rem %>%
  filter(AntigenType == "P. vivax antigen" & Group == "Brazilian") %>%
  filter(!is.na(Timepoint)) %>%
  filter(!is.na(seropositive)) %>%
  group_by(Antigen, Timepoint) %>%
  summarize(n = n(), seroprevalence = sum(seropositive)/n()) %>%
  mutate(timepoint = factor(Timepoint)) %>%
  mutate(timepoint = paste0(timepoint, " (n=", n, ")")) %>%
  mutate(timepoint = fct_reorder(timepoint, Timepoint, .desc = FALSE)) %>%
  ggplot(., aes(x=timepoint, y = seroprevalence)) +
  geom_bar(stat = "identity") +
  scale_y_continuous(labels = scales::percent) +
  ylab("% seropositive") +
  xlab("days from acute vivax malaria") +
  geom_hline(yintercept = .50, linetype = "dotted", color="red") +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) +
  facet_wrap(~Antigen)
Seroprevalence at each timepoint by antigen

(#fig:seroprevalence by antigen)Seroprevalence at each timepoint by antigen

The data shows that >50% seroprevalence is maintained for Pv41, PvGAMA, and PvRBP2 from acute presentation all the way up to 180 days. Pv12 and PvEBP also demonstrate more prolonged maintenance of seropositivity 180 days from acute vivax malaria.

We take advantage of the longitudinal data to ask how many people are seropositive at admission and either seroconvert or serorevert over time?

6.1 Fig. 2. Seropositivity at each timepoint by antigen for subjects with data

Subjects with all four time points

mytable <- table("subject" = alldat_norm_bkgd_rem$Subject, "time point" = alldat_norm_bkgd_rem$Timepoint)

#with 0, 30, 60, 180
subjects_with_0_3_60_180 <- as.data.frame.matrix(mytable, optional = TRUE, make.names = TRUE) %>%
  rownames_to_column(var = "Subject") %>%
  as_tibble() %>%
  dplyr::filter(`0` != 0 & `30` != 0 & `60` != 0 & `180` != 0) %>%
  .$Subject

length(unique(subjects_with_0_3_60_180))
## [1] 8
alldat_norm_bkgd_rem %>%
  filter(Subject %in% subjects_with_0_3_60_180) %>%
  filter(AntigenType == "P. vivax antigen" & Group == "Brazilian") %>%
  filter(!is.na(Timepoint)) %>%
  mutate(timepoint = factor(Timepoint)) %>%
  mutate(subject = factor(Subject)) %>%
  mutate(reactivity = factor(seropositive, levels = c(0,1), labels = c("seronegative","seropositive"))) %>%
  select(subject, Antigen, timepoint, FI_norm_bkgd_rem, reactivity, seropositive) %>%
  group_by(Antigen, timepoint) %>%
  mutate(subject = fct_reorder(subject, FI_norm_bkgd_rem, .desc = FALSE)) %>%
  ungroup() %>%
  ggplot(., aes(x=timepoint, y = subject, fill = reactivity)) +
  geom_tile(color = "white", size = 0.1) +
  scale_fill_manual(values = c("gray80", "black")) +
  #scale_fill_viridis(name="reactivity", discrete = TRUE, option = "magma", direction = -1) +
  coord_equal() +
  facet_wrap(~Antigen, nrow = 1) +
  labs(x="days from acute vivax", y="subjects", title = "Seropositivity over time") +
  ggthemes::theme_tufte(base_family="Helvetica") + 
  theme(axis.ticks=element_blank(),
        axis.text=element_text(size=12),
        panel.border=element_blank(),
        plot.title=element_text(hjust=0),
        strip.text=element_text(hjust=0),
        panel.spacing.x = unit(0.25, "cm"),
        panel.spacing.y = unit(0.25, "cm"),
        legend.title=element_text(size=8),
        legend.title.align=1,
        legend.text=element_text(size=8),
        legend.position="bottom",
        legend.key.size=unit(0.2, "cm"),
        legend.key.width=unit(1, "cm"))

7 Percent of max response

Lastly, we can also visualize as percent of max, which also does not need any normalization or background subtraction given these are canceled out. Hence, raw FI is used.

7.1 Wrangle data

Strategy is to group by subject and antigen and divide raw FI over max(FI) and express as a percentage.

alldat_norm_bkgd_rem_pctmax <- alldat_norm_bkgd_rem %>%
  group_by(Plate, Subject, Antigen) %>%
  mutate(FI_pct_max = FI_norm_bkgd_rem/max(FI_norm_bkgd_rem, na.rm = TRUE)) %>%
  ungroup()

Determine subjects who have data at days 0 and 30 and days 0, 30, 60.

mytable <- table("subject" = alldat_norm_bkgd_rem$Subject, "time point" = alldat_norm_bkgd_rem$Timepoint)

#subjects with at least day 0 and 30 data
subjects_with_0_30 <- as.data.frame.matrix(mytable, optional = TRUE, make.names = TRUE) %>%
  rownames_to_column(var = "Subject") %>%
  as_tibble() %>%
  dplyr::filter(`0` != 0 & `30` != 0) %>%
  .$Subject

length(unique(subjects_with_0_30))
## [1] 37
#with 0, 30, 60
subjects_with_0_3_60 <- as.data.frame.matrix(mytable, optional = TRUE, make.names = TRUE) %>%
  rownames_to_column(var = "Subject") %>%
  as_tibble() %>%
  dplyr::filter(`0` != 0 & `30` != 0 & `60` != 0) %>%
  .$Subject

length(unique(subjects_with_0_3_60))
## [1] 19

8 Explore models

Estimate the curve from day 0 through day 180 as exponential decay. For this, we determine the timepoint with the max FI for each antigen for each subject, and then analyze only values from the max timepoint onward to give the most accurate model of decay. This is because values prior to the max are not needed to determine decay and actually makes the decay model less accurate

You can plot the exponential decay fit using the following: (see https://stackoverflow.com/questions/41101841/exponential-fit-in-ggplot-r). By taking the natural log of the FI, you can then plot as linear decay.

Decay be approximated by as simple linear model. Since we’re using repeated measures, we will opt for a mixed effects model with random intercepts for subjects. Since we have different slopes for each antigen, we will use the do and tidy functions applied to lmer.

8.1 Calculate half-lives

Multiple linear regression with ln(reactivity) as the dependent variable and time (days) as the independent variable in separate models unadjusted or adjusted for years living in the malaria-endemic Amazon region. A separate model was run for each antigen. Half-life (t1/2) was calculated as ln(0.5)/estimate and further converted to months by dividing by 30. P values were adjusted for multiple comparisons (antigens) using the Benjamini-Hochberg (BH) procedure.

8.2 Clean data for model building

Requirements 1. Use only data from timepoint with max FI for each subject and antigen onward so as to accurately model decay 2. Select only subjects with at least two datapoints from max timepoint onward

myantigens <- c("Pv41", "PvGAMA", "PvRBP2b", "PVX_081550", "PvDBP-FL", "PvEBP", "Pv12", "PvMSP3a", "tetanus toxoid")
#not usd for manuscript
#https://academic.oup.com/abt/article/4/3/144/6309336
#tetanus antibodies: https://www.nejm.org/doi/full/10.1056/NEJMoa066092
library(lmerTest)
halflife_res_mixed <- alldat_norm_bkgd_rem_pctmax %>%
  select(c(Subject, Group, AntigenType, Antigen, Timepoint, Years_Endemic, gender, age, FI_norm_bkgd_rem, FI_pct_max)) %>%
  filter(Group == "Brazilian") %>% #filter only Brazilian samples
  filter(Subject != "pos_control") %>% #remove positive controls
  filter(!is.nan(FI_pct_max)) %>% #remove NaNs
  filter(Antigen %in% myantigens) %>% #filter on only Pv antigens that are have seroprevalence >50%
  left_join(., alldat %>%
              filter(Group == "Brazilian") %>% #filter only Brazilian samples
              filter(Subject != "pos_control") %>% #remove positive controls
              select(Subject, "Number of prior malaria episodes") %>%
              rename(Prior_malaria = "Number of prior malaria episodes") %>%
              distinct(Subject, .keep_all = TRUE) %>%
              mutate(Prior_malaria = as.integer(Prior_malaria)),
            by = "Subject") %>%
  mutate(Antigen = factor(Antigen, levels = c("Pv41", "PvGAMA", "PvRBP2b", "PVX_081550", "PvDBP-FL", "PvEBP", "Pv12", "PvMSP3a", "tetanus toxoid"))) %>% 
  mutate(Years_Endemic = as.numeric(Years_Endemic)) %>% #this line is the one that gives the "NAs introduced by coercion" message
  mutate(Years_Endemic = ifelse(is.na(Years_Endemic),median(Years_Endemic, na.rm = TRUE),Years_Endemic)) %>% #impute missing variable for 2 subjects
  mutate(Prior_malaria = ifelse(is.na(Prior_malaria),median(Prior_malaria, na.rm = TRUE),Prior_malaria)) %>% #impute missing variable for 4 subjects
  mutate(ln_FI_pct_max = log(FI_pct_max+0.0001)) %>%
  mutate(ln_FI_norm_bkgd_rem = log(FI_norm_bkgd_rem+1)) %>% 
  arrange(Subject, Antigen, Timepoint) %>%
  group_by(Subject, Antigen) %>%
  mutate(days_from_max_pct = Timepoint - Timepoint[which.max(FI_norm_bkgd_rem)]) %>%
  filter(between(row_number(),which.max(FI_norm_bkgd_rem), n())) %>%  #this line removes all values before the max so that we can estimate decay from the highest available value
  filter(n()>1) %>% #filter subjects with at least two data points
  ungroup()

#Subject 32 did not have tetanus toxoid response, so removed from analysis
halflife_res_mixed <- halflife_res_mixed %>%
  filter(!(Subject == "032" & Antigen == "tetanus toxoid"))

8.3 Fig. 3. Reactivity over time points, faceted by Pv antigen, as ln(max FI) per subject, exponential decay

Plot exponential decay

#add small prior to FI_pct_max to avoid taking log of zero
exponential_decay_plot_all <- halflife_res_mixed %>%
  ggplot(., aes(x=days_from_max_pct, y = FI_pct_max + 0.0001, fill= Antigen, group = Antigen, color = Antigen)) +
  #geom_point(colour="black",pch=21, size=2.5) +
  #geom_smooth(method ="lm", formula = y~x, alpha = 0.15) +
  geom_smooth(method="glm", formula=y~x, method.args=list(family=gaussian(link="log")), alpha = 0.15, se = TRUE) +
  geom_hline(yintercept = c(0.25,0.5,0.75,1), linetype = "dotted", color = "gray") +
  scale_y_continuous(labels = scales::percent, breaks = c(0.25,0.5,0.75,1)) +
  scale_x_continuous(breaks = c(0,30,60,90,120,150,180)) +
  ylab("percent of max antibody reactivity") +
  xlab("days after max antibody reactivity") +
  theme_bw() +
  theme(text = element_text(size=11),
        axis.ticks=element_blank(),
        #axis.text.x = element_text(angle = 45, vjust = 1, hjust=1),
        axis.text=element_text(size=6.5),
        panel.border=element_blank(),
        plot.title=element_text(hjust=0),
        strip.background = element_blank(),
        panel.spacing.x = unit(0.25, "cm"),
        panel.spacing.y = unit(0.25, "cm"),
        legend.position = "top",
        legend.title=element_blank(),
        legend.title.align=1,
        legend.text=element_text(size=8),
        legend.key.size=unit(0.15, "cm"),
        legend.key.width=unit(0.15, "cm")) +
  scale_color_brewer(palette="Spectral") +
  scale_fill_brewer(palette="Spectral")
 
exponential_decay_plot_all

#refit model
nls_fit <- halflife_res_mixed %>%
  nls(formula = FI_pct_max + 0.0001 ~ a*exp(-b *days_from_max_pct), 
               start = c(a=10, b=0.01), data = .)
coef(nls_fit)
##           a           b 
## 0.951856825 0.009894891
#add small prior to FI_pct_max to avoid taking log of zero
exponential_decay_plot_all_facet <- exponential_decay_plot_all +
  stat_regline_equation(label.y = 1.07, label.x = 100, aes(label = ..adj.rr.label..), color = "gray10", cex = 3) +
  theme(legend.position = "none") +
  facet_wrap(~Antigen)
 
exponential_decay_plot_all_facet

Plot natural log of FI to linearize

#add small prior to FI_pct_max to avoid taking log of zero
linear_decay_plot_facet <- halflife_res_mixed %>%
  ggplot(., aes(x=days_from_max_pct, y = ln_FI_pct_max, group = Antigen, fill= Antigen, color = Antigen)) +
  geom_point(colour="black",pch=21, size=2.5) +
  geom_smooth(method ="lm", formula = y~x, alpha = 0.15) +
  #geom_smooth(method="glm", formula=y~x, method.args=list(family=gaussian(link="log")), alpha = 0.15, se = TRUE) +
  #geom_hline(yintercept = c(0.25,0.5,0.75,1), linetype = "dotted", color = "gray") +
  #scale_y_continuous(labels = scales::percent, breaks = c(0.25,0.5,0.75,1)) +
  scale_x_continuous(breaks = c(0,30,60,90,120,150,180)) +
  ylab("ln(proportion max reactivity)") +
  xlab("days after max antibody reactivity") +
  theme_bw() +
  scale_color_brewer(palette="Spectral") +
  scale_fill_brewer(palette="Spectral") +
  #stat_regline_equation(label.y = -5, label.x = 5, aes(label = ..rr.label..), color = "gray10") +
  stat_regline_equation(label.y = -6, label.x = 5, aes(label = ..adj.rr.label..), color = "gray10", cex = 3) +
  facet_wrap(~Antigen) +
  theme(text = element_text(size=11),
        axis.ticks=element_blank(),
        #axis.text.x = element_text(angle = 45, vjust = 1, hjust=1),
        axis.text=element_text(size=6.5),
        panel.border=element_blank(),
        plot.title=element_text(hjust=0),
        strip.background = element_blank(),
        panel.spacing.x = unit(0.25, "cm"),
        panel.spacing.y = unit(0.25, "cm"),
        # legend.title=element_text(size=8),
        # legend.title.align=1,
        # legend.text=element_text(size=8),
        # legend.key.size=unit(0.2, "cm"),
        # legend.key.width=unit(1, "cm"),
        legend.position="none") 

linear_decay_plot_facet

9 Linear mixed effects model using lmer

halflife_res_mixed <- halflife_res_mixed %>%
  left_join(., alldat %>%
              dplyr::select(c(Subject, city)) %>%
              filter(!duplicated(Subject)) %>%
              drop_na(),
            by = "Subject")

9.1 Unadjusted

library(tableone)
library(flextable)
library(officer)
source("customtab.R")

foo1 <- lmer_unadj_res %>%
  dplyr::select(Antigen, estimate, std.error, statistic, halflife_days, halflife_days_lowCI, halflife_days_highCI)
  
# Rename first variable from n to No.
#tab1_df$Variable[1] <- "No."
# Set Table header
header <- str_squish(str_remove("Table 2. Antibody half-life determination using linear mixed-effects regression of antibody reactivity", "\n"))
# Set Table footer
footer <- str_squish(str_remove("A linear mixed effects regression with ln(reactivity) as the dependent variable and time (days) and subject as fixed and random effects, respectively. Half-life (t1/2) was calculated as ln(0.5)/estimate.", "\n"))
# Set custom_tab() defaults
customtab_defaults()
# Create the flextable object
flextable_lmer_unadj_res <- custom_tab(foo1, header, footer)

9.2 Table 2. Half-lives, linear-mixed effects regression, unadjusted analysis

9.3 Adjusted

library(tableone)
library(flextable)
library(officer)
source("customtab.R")

foo2 <- lmer_adj_res %>%
  dplyr::select(Antigen, estimate, std.error, "BH-adjusted.p.value", significant, halflife_days, halflife_days_lowCI, halflife_days_highCI)

# Rename first variable from n to No.
#tab1_df$Variable[1] <- "No."
# Set Table header
header <- str_squish(str_remove("Table S2. Antibody half-life determination using linear mixed-effects regression of antibody reactivity, adjusted analysis", "\n"))
# Set Table footer
footer <- str_squish(str_remove("A linear mixed effects regression with ln(reactivity) as the dependent variable and time (days), sex, age, city, and years in the malaria-endemic Amazon region as fixed effects and subject as a random effect. Half-life (t1/2) was calculated as ln(0.5)/estimate.", "\n"))
# Set custom_tab() defaults
customtab_defaults()
# Create the flextable object
flextable_lmer_adj_res <- custom_tab(foo2, header, footer)

9.4 Table S2. Half-lives, linear-mixed effects regression, adjusted analysis

10 Table S2. Include more measures and both models

unadj_and_adj <- lmer_unadj_res %>%
  dplyr::select(-c(effect, df, group, term, halflife_months, halflife_years, significant)) %>%
  left_join(., lmer_adj_res %>%
              dplyr::select(-c(effect, df, group, term, halflife_months, halflife_years, significant)),
  by = "Antigen")
colnames(unadj_and_adj) <- gsub("\\.x", "_unadj", colnames(unadj_and_adj))
colnames(unadj_and_adj) <- gsub("\\.y", "_adj", colnames(unadj_and_adj))
writexl::write_xlsx(unadj_and_adj, paste0(resdir, "Table S1 lmer both adj and unadj include prior malaria 06122022.xlsx"))

We can also use a linear mixed effects model with a beta-distributed response using FI_pct_max given that the y (dependent variable) is bounded by 0 and 1. This can be achieved from shifting the 0 and 1 values slight towards 0.5.

#https://stats.stackexchange.com/questions/347629/how-to-handle-bounded-0-1-dependent-variable-that-causes-one-to-fail-heterosce
#https://drizopoulos.github.io/GLMMadaptive/articles/Custom_Models.html#beta-mixed-effects-model

library(glmmTMB)

glmmTMB_res <- halflife_res_mixed %>% 
  mutate(ln_FI_pct_max = log(FI_pct_max + 1.0001)) %>% #add 1 to avoid taking log of 0, while keeping boundaries of ln transformed values between 0 and 1
  group_by(Antigen) %>%
  do(broomExtra::tidy(glmmTMB(ln_FI_pct_max ~ Years_Endemic + days_from_max_pct + (1|Subject), data = ., family = beta_family()), conf.int = TRUE)) %>%
  ungroup() %>%
  mutate(p.value = as.numeric(formatC(p.value, format = "e", digits = 2))) %>%
  mutate("BH-adjusted.p.value" = as.numeric(p.adjust(p.value, method = "BH"))) %>%
  mutate(halflife_days = log(0.5)/estimate) %>%
  mutate(halflife_days_lowCI = log(0.5)/conf.low) %>%
  mutate(halflife_days_highCI = log(0.5)/conf.high) %>%
  mutate(halflife_months = halflife_days/30) %>%
  mutate(halflife_years = halflife_days/365.25) %>%
  mutate(across(where(is.numeric), signif, 2)) %>%
  dplyr::filter(term == "days_from_max_pct") %>%
  mutate(significant = ifelse(.$"BH-adjusted.p.value"<0.05, "*", "NS")) %>%
  mutate("BH-adjusted.p.value" = formatC(.$"BH-adjusted.p.value", format = "e", digits = 2)) %>%
  arrange(desc(halflife_days)) %>%
  dplyr::select(Antigen, estimate, std.error, "BH-adjusted.p.value", significant, halflife_days, halflife_days_lowCI, halflife_days_highCI)
library(tableone)
library(flextable)
library(officer)
source("customtab.R")

# Rename first variable from n to No.
#tab1_df$Variable[1] <- "No."
# Set Table header
header <- str_squish(str_remove("Table 3. Antibody half-life determination using beta-distributed response linear mixed effects model of antibody reactivity as percent of max over time, adjusted analysis", "\n"))
# Set Table footer
footer <- str_squish(str_remove("A beta-distributed response linear mixed effects model with ln(reactivity) as the dependent variable and time (days) and years in the malaria-endemic Amazon region as fixed effects and subject as a random effect. Half-life (t1/2) was calculated as ln(0.5)/estimate.", "\n"))
# Set custom_tab() defaults
customtab_defaults()
# Create the flextable object
flextable_glmmTMB_res <- custom_tab(glmmTMB_res, header, footer)

10.1 Table X. Half-lives, beta-distributed response linear mixed effects model, adjusted analysis

Not included in manuscript.

11 Longley dataset

11.1 Compare with published Longley et al. PLoS NTD 2017 dataset207

https://www.nature.com/articles/s41591-020-0841-4

longley_dat_clean <- longley_dat %>%
  filter(site == "Brazil") %>%
  #mutate(Antigen = factor(Antigen, levels = c("Pv41", "PvGAMA", "PvRBP2b", "PVX_081550", "PvDBP-FL", "Pv12", "PvMSP3a"))) %>% 
  rename(Subject = `Subject ID`) %>%
  mutate(Week = as.numeric(gsub("w", "", visit))) %>%
  mutate(Day = Week*7) %>%
  mutate(Month = Week/4) %>%
  arrange(Subject, Antigen, Week) %>%
  select(Subject, Antigen, visit, Day, Week, Month, response) %>%
  group_by(Subject, Antigen, visit, Day, Week, Month) %>%
  summarise_at(vars(response), mean, na.rm = TRUE) %>%
  filter(Week != 36) %>% # responses at week 36 go up, so only use data from before week 36
  ungroup() %>%
  group_by(Subject, Antigen) %>%
  mutate(days_from_max_pct = Day - Day[which.max(response)]) %>%
  filter(between(row_number(),which.max(response), n())) %>%  #this line removes all values before the max so that we can estimate decay from the highest available value
  filter((response > 0 & Day == 0) | Day > 0) %>% #filter subjects with zero response
  filter(n()>1) %>% #filter subjects with at least two data points
  ungroup() %>%
  mutate(dummy_name = paste(Subject, Antigen, sep="_"))

#remove subjects where all values are zero 
longley_dat_keep <- longley_dat_clean %>%
  group_by(Subject, Antigen) %>%
  summarize(sum = sum(response)) %>%
  filter(sum > 0) %>%
  ungroup() %>%
  mutate(dummy_name = paste(Subject, Antigen, sep="_"))


longley_dat_filtered <- longley_dat_clean %>%
    mutate(dummy_name = paste(Subject, Antigen, sep="_")) %>%
    filter(dummy_name %in% longley_dat_keep$dummy_name)


lmer_adj_res_longley <- longley_dat_filtered %>%
  select(Subject, Antigen, Day, response) %>%
  mutate(ln_response = log(response+1)) %>% #add 1 to all values to avoid taking log of zero
  group_by(Antigen) %>%
  do(broomExtra::tidy(lmerTest::lmer(ln_response~Day + (1|Subject), data = .), conf.int = TRUE)) %>%
  ungroup() %>%
  mutate(p.value = as.numeric(formatC(p.value, format = "e", digits = 2))) %>%
  mutate("BH-adjusted.p.value" = as.numeric(p.adjust(p.value, method = "BH"))) %>%
  mutate(halflife_days_longley_recalc = log(0.5)/estimate) %>%
  mutate(halflife_days_lowCI_longley_recalc = log(0.5)/conf.low) %>%
  mutate(halflife_days_highCI_longley_recalc = log(0.5)/conf.high) %>%
  mutate(halflife_months_longley_recalc = halflife_days_longley_recalc/30) %>%
  mutate(halflife_years_longley_recalc = halflife_days_longley_recalc/365.25) %>%
  mutate(across(where(is.numeric), signif, 2)) %>%
  dplyr::filter(term == "Day") %>%
  mutate(significant = ifelse(.$"BH-adjusted.p.value"<0.05, "*", "NS")) %>%
  mutate("BH-adjusted.p.value" = formatC(.$"BH-adjusted.p.value", format = "e", digits = 2)) %>%
  arrange(desc(halflife_days_longley_recalc)) %>%
  dplyr::select(Antigen, estimate, std.error, "BH-adjusted.p.value", significant, halflife_days_longley_recalc, halflife_days_lowCI_longley_recalc, halflife_days_highCI_longley_recalc)

lmer_adj_res_longley_short <- lmer_adj_res_longley %>%
  dplyr::select(Antigen, contains("halflife"))

lmer_adj_res_longley_recalc <- protein_dat %>%
  right_join(., lmer_unadj_res,
             by = "Antigen") %>%
  left_join(., lmer_adj_res_longley_short,
            by = "Antigen") %>%
  select(Antigen, Protein, everything()) %>%
  rename(PlasmoDB_Accession = Protein) %>%
  mutate(overlapping_CIs = ifelse(
    (halflife_days_highCI_longley_recalc < halflife_days_lowCI) |
      (halflife_days_highCI < halflife_days_lowCI_longley_recalc) ,
          "no", "yes"))

lmer_adj_res_longley_recalc %>%
  filter(overlapping_CIs == "yes") %>%
  select(Antigen)
##      Antigen
## 1       Pv12
## 2       Pv41
## 3 PVX_081550
## 4    PvRBP2b
lmer_adj_res_longley_recalc %>%
  filter(overlapping_CIs == "no") %>%
  select(Antigen)
##    Antigen
## 1  PvMSP3a
## 2   PvGAMA
## 3 PvDBP-FL

11.2 Plot Forest Plot comparing Tenshi et al vs Longley et al